import requests
from lxml.etree import HTML
import time

t1 = time.time()
with open("./novel.txt1", "w") as f:
    url = "http://www.xbiquge.la/xiaoshuodaquan/"
    headers = {
        "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/79.0.3945.117 Mobile Safari/537.36"}
    r = requests.get(url=url, headers=headers)
    text = HTML(r.text)
    novel_infos = text.xpath("//div[@class='novellist']//a")

    i = 1
    for novel in novel_infos:
        novel_info = {}
        novel_url = novel.xpath("./@href")[0]
        novel_name = novel.xpath("./text()")[0]
        novel_info["novel_name"] = novel_name
        novel_info["novel_url"] = novel_url
        try:
            r = requests.get(novel_url, headers=headers)
            r.encoding = 'utf-8'
            text = HTML(r.text)
            kind = text.xpath("//div[@class='con_top']/a[2]/text()")[0]
            author = text.xpath("//div[@id='info']//p[1]/text()")[0][7:]
            novel_info["novel_kind"] = kind
            novel_info["novel_author"] = author
        except IndexError:
            pass
        f.write(str(novel_info) + "\n")
        print("获取第{}条数据成功！".format(i))
        i += 1
t2 = time.time()
print("耗时{}秒".format(t2-t1))


# 详情页调试
# url = "http://www.xbiquge.la/14/14831/"
# headers = {
#         "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) "
#                       "Chrome/79.0.3945.117 Mobile Safari/537.36"}
# r = requests.get(url, headers=headers)
# r.encoding = 'utf-8'
# text = HTML(r.text)
# kind = text.xpath("//div[@class='con_top']/a[2]/text()")[0]
# print(kind)
# author = text.xpath("//div[@id='info']//p[1]/text()")[0][7:]
# print(author)
